x86: Make x32 syscall support conditional on a kernel parameter
authorBen Hutchings <ben@decadent.org.uk>
Fri, 25 Jul 2014 00:16:15 +0000 (01:16 +0100)
committerBen Hutchings <ben@decadent.org.uk>
Wed, 4 Jan 2017 19:39:36 +0000 (19:39 +0000)
Enabling x32 in the standard amd64 kernel would increase its attack
surface while provide no benefit to the vast majority of its users.
No-one seems interested in regularly checking for vulnerabilities
specific to x32 (at least no-one with a white hat).

Still, adding another flavour just to turn on x32 seems wasteful.  And
the only differences on syscall entry are two instructions (mask out
the x32 flag and compare the syscall number).

So pad the standard comparison with a nop and add a kernel parameter
"syscall.x32" which controls whether this is replaced with the x32
version at boot time.  Add a Kconfig parameter to set the default.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Gbp-Pq: Topic features/x86
Gbp-Pq: Name x86-make-x32-syscall-support-conditional.patch

Documentation/kernel-parameters.txt
arch/x86/Kconfig
arch/x86/entry/common.c
arch/x86/entry/entry_64.S
arch/x86/entry/syscall_64.c
arch/x86/include/asm/elf.h
arch/x86/include/asm/syscall.h

index 46726d4899feec0b46656aeb26894dde398eab8d..5d97dd48cafdabc35e7d4c1679830148ed2271ee 100644 (file)
@@ -3939,6 +3939,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
        switches=       [HW,M68k]
 
+       syscall.x32=    [KNL,x86_64] Enable/disable use of x32 syscalls on
+                       an x86_64 kernel where CONFIG_X86_X32 is enabled.
+                       Default depends on CONFIG_X86_X32_DISABLED.
+
        sysfs.deprecated=0|1 [KNL]
                        Enable/disable old style sysfs layout for old udev
                        on older distributions. When this option is enabled
index 2a1f0ce7c59acac6e1a6543eeda61ccb3792f277..feb598d142527c553fdf2d2a952eb1bf378aeeda 100644 (file)
@@ -2709,6 +2709,14 @@ config X86_X32
          elf32_x86_64 support enabled to compile a kernel with this
          option set.
 
+config X86_X32_DISABLED
+       bool "x32 ABI disabled by default"
+       depends on X86_X32
+       default n
+       help
+         Disable the x32 ABI unless explicitly enabled using the
+         kernel paramter "syscall.x32=y".
+
 config COMPAT
        def_bool y
        depends on IA32_EMULATION || X86_X32
index 1433f6b4607d6f6f38a1415afa02f43ca890d8d7..e8045aad26bc334c2055b5f842c17ab798b0821c 100644 (file)
@@ -272,6 +272,7 @@ __visible void do_syscall_64(struct pt_regs *regs)
 {
        struct thread_info *ti = pt_regs_to_thread_info(regs);
        unsigned long nr = regs->orig_ax;
+       unsigned int syscall_mask, nr_syscalls_enabled;
 
        enter_from_user_mode();
        local_irq_enable();
@@ -284,8 +285,19 @@ __visible void do_syscall_64(struct pt_regs *regs)
         * table.  The only functional difference is the x32 bit in
         * regs->orig_ax, which changes the behavior of some syscalls.
         */
-       if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
-               regs->ax = sys_call_table[nr & __SYSCALL_MASK](
+       if (__SYSCALL_MASK == ~0U || x32_enabled) {
+               syscall_mask = __SYSCALL_MASK;
+               nr_syscalls_enabled = NR_syscalls;
+       } else {
+               /*
+                * x32 syscalls present but not enabled.  Don't mask out
+                * the x32 flag and don't enable any x32-specific calls.
+                */
+               syscall_mask = ~0U;
+               nr_syscalls_enabled = 512;
+       }
+       if (likely((nr & syscall_mask) < nr_syscalls_enabled)) {
+               regs->ax = sys_call_table[nr & syscall_mask](
                        regs->di, regs->si, regs->dx,
                        regs->r10, regs->r8, regs->r9);
        }
index 02fff3ebfb870df602b09435529a13bd2190acc2..760ce6bf0693b55066c16ee8e2679fe3c786e8b8 100644 (file)
@@ -193,8 +193,12 @@ entry_SYSCALL_64_fastpath:
 #if __SYSCALL_MASK == ~0
        cmpq    $__NR_syscall_max, %rax
 #else
-       andl    $__SYSCALL_MASK, %eax
-       cmpl    $__NR_syscall_max, %eax
+.global system_call_fast_compare
+.global system_call_fast_compare_end
+system_call_fast_compare:
+       cmpq    $511, %rax                      /* x32 syscalls start at 512 */
+       .byte   P6_NOP4
+system_call_fast_compare_end:
 #endif
        ja      1f                              /* return -ENOSYS (already in pt_regs->ax) */
        movq    %r10, %rcx
@@ -328,6 +332,16 @@ opportunistic_sysret_failed:
        jmp     restore_c_regs_and_iret
 END(entry_SYSCALL_64)
 
+#if __SYSCALL_MASK != ~0
+       /* This replaces the usual comparisons if syscall.x32 is set */
+.global system_call_mask_compare
+.global system_call_mask_compare_end
+system_call_mask_compare:
+       andl    $__SYSCALL_MASK, %eax
+       cmpl    $__NR_syscall_max, %eax
+system_call_mask_compare_end:
+#endif
+
 ENTRY(stub_ptregs_64)
        /*
         * Syscalls marked as needing ptregs land here.
index 9dbc5abb6162fa20581069499667a8c49b254868..753eeae0a72e0fac8fca10cc20112a686ad6edc0 100644 (file)
@@ -3,8 +3,14 @@
 #include <linux/linkage.h>
 #include <linux/sys.h>
 #include <linux/cache.h>
+#include <linux/moduleparam.h>
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "syscall."
+#include <linux/bug.h>
+#include <linux/init.h>
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
+#include <asm/text-patching.h>
 
 #define __SYSCALL_64_QUAL_(sym) sym
 #define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
@@ -25,3 +31,36 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
        [0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
+
+#ifdef CONFIG_X86_X32_ABI
+
+/* Maybe enable x32 syscalls */
+
+bool x32_enabled = !IS_ENABLED(CONFIG_X86_X32_DISABLED);
+module_param_named(x32, x32_enabled, bool, 0444);
+
+extern char system_call_fast_compare_end[], system_call_fast_compare[],
+       system_call_mask_compare_end[], system_call_mask_compare[];
+
+static int __init x32_enable(void)
+{
+       BUG_ON(system_call_fast_compare_end - system_call_fast_compare != 10);
+       BUG_ON(system_call_mask_compare_end - system_call_mask_compare != 10);
+
+       if (x32_enabled) {
+               text_poke_early(system_call_fast_compare,
+                               system_call_mask_compare, 10);
+#ifdef CONFIG_X86_X32_DISABLED
+               pr_info("Enabled x32 syscalls\n");
+#endif
+       }
+#ifndef CONFIG_X86_X32_DISABLED
+       else
+               pr_info("Disabled x32 syscalls\n");
+#endif
+
+       return 0;
+}
+late_initcall(x32_enable);
+
+#endif
index e7f155c3045e1b81d6b60e7af3ae7522e8c0f269..ce6342cadea848f6629963a3530e90b880771fed 100644 (file)
@@ -9,6 +9,7 @@
 #include <asm/ptrace.h>
 #include <asm/user.h>
 #include <asm/auxvec.h>
+#include <asm/syscall.h>
 
 typedef unsigned long elf_greg_t;
 
@@ -162,7 +163,7 @@ do {                                                \
 
 #define compat_elf_check_arch(x)                                       \
        (elf_check_arch_ia32(x) ||                                      \
-        (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
+        (x32_enabled && (x)->e_machine == EM_X86_64))
 
 #if __USER32_DS != __USER_DS
 # error "The following code assumes __USER32_DS == __USER_DS"
index 4e23dd15c661fd44c3ed13193ea349fbf39b1fe5..8fb0ca8ead2a81ae7372e3192cf60e1cfaed100a 100644 (file)
@@ -35,6 +35,12 @@ extern const sys_call_ptr_t sys_call_table[];
 extern const sys_call_ptr_t ia32_sys_call_table[];
 #endif
 
+#if defined(CONFIG_X86_X32_ABI)
+extern bool x32_enabled;
+#else
+#define x32_enabled 0
+#endif
+
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
  * This importantly ignores the high bits on 64-bit, so comparisons